In [107]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [108]:
def sigmoid(w_vector, x_vector, return_deriv=True):
# you should fill this in
sig = None
deriv = sig * (1 - sig)
if return_deriv:
return sig, deriv
else:
return sig
In [218]:
from sklearn.datasets import make_classification
from matplotlib.colors import ListedColormap
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, random_state=26)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright)
Out[218]:
In [219]:
X.shape
Out[219]:
In [220]:
# Add a dummy feature to the Design Matrix for the bias term in the linear model
X = np.concatenate([np.ones((100,1)), X], axis=1)
X.shape
Out[220]:
In [232]:
# Let's look at the first three training examples
# you can think of these features as x0, x1, x2
# where x0 is a dummy feature that we are using for the bias term in the logistic regression model
X[:3]
Out[232]:
In [221]:
# Randomly generate some weights (coefficients) for the logistic regression model
# Hint: you could just use np.zeros to generate a vector of 3 zeros then just add some noise that
# you can generate using np.random.uniform
weight_vector = None
weight_vector
Out[221]:
In [223]:
weight_vector.shape
Out[223]:
In [222]:
# Let's import accuracy_score from sklearn.metrics to keep track of how well we can predict y using X.
from sklearn.metrics import accuracy_score
print X.dot(weight_vector)[:5]
accuracy_score(np.round(X.dot(weight_vector)), y)
Out[222]:
In [224]:
# for each iteration of the fit, for each data point (i) the update for each weight (j) (coefficient) of the Logistic Regression model is
# weight_j += learning_rate * error * derivative_of_the_sigmoid_for_y_hat * x_j_i
# you should update all of the weights simultaneously
def perform_gradient_descent(weight_vector, X, y, batch_size):
for i in range(num_iterations):
# for each iteration you should update your weight vector in the direction of minimum error
pass
# print the current accuracy score given the state of the weight vector after your updates
# Be sure that the accuracy is increasing
# It may end up bouncing around the optimal value depending on your batch size and whether or not you data is
# linearly separable.
print accuracy_score(np.round(X.dot(weight_vector)), y)
return weight_vector
In [ ]:
# Task #1
In [ ]:
# Task #2
In [ ]:
# Task #3